Sales Data Forecast

1.Data process

1.1 Statistics data at year season and month

mass_data = proc_data(xlsx = './Data_for_test.xlsx',"DOUGHNUTS",1)

### food data porcess 
food_data = proc_data(xlsx = './Data_for_test.xlsx',"DOUGHNUTS",2)
### drug data porcess 

drug_data = proc_data(xlsx = './Data_for_test.xlsx',"DOUGHNUTS",3)

1.2 used TS func convert time series

#    1:2:3=month_vol,month_pr,month_turn,
#    4ï¼›5ï¼›6=years_vol,years_pr,years_turn,
#   7:8:9=quarters_vol,quarters_pr,quarters_turn
mass=proc_ts_data(mass_data)
food= proc_ts_data(food_data)
drug= proc_ts_data(drug_data)
head(food[[2]])
##           Jul      Aug      Sep      Oct      Nov      Dec
## 2004 2.503481 2.494095 2.473212 2.456530 2.486030 2.517373

2Forecast

2.1 time regression for year turnover data

autoplot(mass[[4]],main = "mass_vol")

autoplot(mass[[5]],main = "mass_pr")

autoplot(mass[[6]],main = "mass_turn")

autoplot(food[[4]],main = "food_vol")

autoplot(food[[5]],main = "food_pr")

autoplot(food[[6]],main = "food_turn")

autoplot(drug[[4]],main = "drug_vol")

autoplot(drug[[5]],main = "drug_pr")

autoplot(drug[[6]],main = "drug_turn")

## used tslm for year data
fc_tsml <- function(datas) {
  fc_year <- tslm(datas ~ trend )
fcast <- forecast(datas, h=3)
autoplot(fcast)
 return(fcast)
}
fc_year_mass_vol = fc_tsml(mass[[4]])
autoplot(fc_year_mass_vol,main="MASS Year VOL Forecast")

fc_year_mass_pr = fc_tsml(mass[[5]])
autoplot(fc_year_mass_pr,main="MASS Year PR Forecast")

fc_year_mass_turn = fc_tsml(mass[[6]])
autoplot(fc_year_mass_turn,main="MASS Year TURNOVER Forecast")

fc_year_food_vol = fc_tsml(food[[4]])
autoplot(fc_year_food_vol,main="FOOD Year VOL Forecast")

fc_year_food_pr = fc_tsml(food[[5]])
autoplot(fc_year_food_pr,main="FOOD Year PR Forecast")

fc_year_food_turn = fc_tsml(food[[6]])
autoplot(fc_year_food_turn,main="FOOD Year TURNOVER Forecast")

fc_year_drug_vol = fc_tsml(drug[[4]])
autoplot(fc_year_drug_vol,main="DRUG Year VOL Forecast")

fc_year_drug_pr = fc_tsml(drug[[5]])
autoplot(fc_year_drug_pr,main="DRUG Year PR Forecast")

fc_year_drug_turn = fc_tsml(drug[[6]])
autoplot(fc_year_drug_turn,main="DRUG Year TURNOVER Forecast")

### 2.2 Forecast season Data at ARIMA model

Manual ARIMA analysis of drug turnover data to predict outcomes over three years

autoplot(drug[[9]])

ggseasonplot(drug[[9]])

ndiffs(drug[[9]])
## [1] 1
nsdiffs(drug[[9]])
## [1] 1
# the result shows need 1 seasonal diff and 1 diff
drug[[9]]%>% diff(lag=4) %>% diff()%>% ggtsdisplay(main="")

auto.arima(drug[[9]])
## Series: drug[[9]] 
## ARIMA(1,0,0)(0,1,0)[4] with drift 
## 
## Coefficients:
##          ar1     drift
##       0.6519  51688.70
## s.e.  0.1786  15041.94
## 
## sigma^2 estimated as 9.663e+09:  log likelihood=-205.84
## AIC=417.69   AICc=419.69   BIC=420
# the auto arima used 1,0,0,  seasonal 0,1,0
drug[[9]]%>%
  Arima(order=c(1,1,0),seasonal=c(0,1,0))
## Series: . 
## ARIMA(1,1,0)(0,1,0)[4] 
## 
## Coefficients:
##           ar1
##       -0.0835
## s.e.   0.2573
## 
## sigma^2 estimated as 1.114e+10:  log likelihood=-194.28
## AIC=392.55   AICc=393.55   BIC=393.97
### 1,1,0 AICc = 393.55
drug[[9]]%>%
  Arima(order=c(0,1,1),seasonal=c(0,1,0))
## Series: . 
## ARIMA(0,1,1)(0,1,0)[4] 
## 
## Coefficients:
##           ma1
##       -0.0975
## s.e.   0.2744
## 
## sigma^2 estimated as 1.113e+10:  log likelihood=-194.27
## AIC=392.53   AICc=393.53   BIC=393.95
### 0,1,1   AICc=393.53 

drug[[9]]%>%
  Arima(order=c(0,1,1),seasonal=c(0,1,0)) %>%
  residuals() %>%
  ggtsdisplay()

# the 0,1,0 AICc value is smaller so choose 0,1,0
drug[[9]]%>%
  Arima(order=c(0,1,1),seasonal=c(0,1,0))->fit3
checkresiduals(fit3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(0,1,0)[4]
## Q* = 1.3378, df = 3, p-value = 0.7202
## 
## Model df: 1.   Total lags used: 4
fit3 %>% forecast(h=12) %>% autoplot()

used auto model Forecast data

autoplot(mass[[7]])

autoplot(mass[[8]])

autoplot(mass[[9]])

autoplot(food[[7]])

autoplot(food[[8]])

autoplot(food[[9]])

autoplot(drug[[7]])

autoplot(drug[[8]])

autoplot(drug[[9]])

arima_model <- function(datas) {
  arima_data =  auto.arima(datas)
  fc_arima = forecast(
  arima_data,
  h = 12
)
  return(fc_arima)
}
fc_season_mass_vol = arima_model(mass[[7]])
autoplot(fc_season_mass_vol,main="MASS Season VOL Forecast at ARIMA model")

fc_season_mass_pr = arima_model(mass[[8]])
autoplot(fc_season_mass_pr,main="MASS Season PR Forecast at ARIMA model")

fc_season_mass_turn = arima_model(mass[[9]])
autoplot(fc_season_mass_turn,main="MASS Season TURNOVER Forecast at ARIMA model")

fc_season_food_vol = arima_model(food[[7]])
autoplot(fc_season_food_vol,main="FOOD Season VOL Forecast at ARIMA model")

fc_season_food_pr = arima_model(food[[8]])
autoplot(fc_season_food_pr,main="FOOD Season PR Forecast at ARIMA model")

fc_season_food_turn = arima_model(food[[9]])
## Warning in auto.arima(datas): Having 3 or more differencing operations is not
## recommended. Please consider reducing the total number of differences.
autoplot(fc_season_food_turn,main="FOOD Season TURNOVER Forecast at ARIMA model")

fc_season_drug_vol = arima_model(drug[[7]])
autoplot(fc_season_drug_vol,main="DRUG Season VOL Forecast at ARIMA model")

fc_season_drug_pr = arima_model(drug[[8]])
autoplot(fc_season_drug_pr,main="DRUG Season PR Forecast at ARIMA model")

fc_season_drug_turn = arima_model(drug[[9]])
autoplot(fc_season_drug_turn,main="DRUG Season TURNOVER Forecast at ARIMA model")

2.3 Forecast month Data uesd Holt-Winters multiplicative model

Divide the data into a test set training set and use the aucc function to check the accuracy of the results

turn_train <- subset(drug[[1]],
                         end = length(drug[[1]]) - 36)
turn_test <- subset(drug[[1]],
                        start = length(drug[[1]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set    -86.56682 26750.55 22345.50 -0.1190131 3.253158 0.3094817
## Test set     -35635.43887 52377.40 43411.24 -4.7024032 5.555725 0.6012389
##                     ACF1 Theil's U
## Training set -0.02172905        NA
## Test set      0.59165871  1.175906
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 12.295, df = 3, p-value = 0.006438
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(drug[[2]],
                         end = length(drug[[2]]) - 36)
turn_test <- subset(drug[[2]],
                        start = length(drug[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                       ME       RMSE        MAE       MPE     MAPE      MASE
## Training set  0.01728991 0.04546837 0.03993854  1.047846 2.527095 0.3334443
## Test set     -0.04178733 0.10274426 0.07569580 -2.388376 4.242628 0.6319793
##                   ACF1 Theil's U
## Training set 0.5717204        NA
## Test set     0.7632717  2.861603
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 39.735, df = 3, p-value = 1.213e-08
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(drug[[3]],
                         end = length(drug[[3]]) - 36)
turn_test <- subset(drug[[3]],
                        start = length(drug[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set  -4345.238  38898.52 28969.79 -0.5735797 2.676801 0.1531004
## Test set     -78759.605 106253.58 92310.17 -5.1880235 6.212212 0.4878435
##                   ACF1 Theil's U
## Training set 0.2639860        NA
## Test set     0.6722729  1.281274
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 30.784, df = 3, p-value = 9.438e-07
## 
## Model df: 16.   Total lags used: 19
############
turn_train <- subset(mass[[1]],
                         end = length(mass[[1]]) - 36)
turn_test <- subset(mass[[1]],
                        start = length(mass[[1]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set  -253.6249  79914.31  65636.47 -3.275598 18.08164 0.5865116
## Test set     96811.6132 114753.72 101565.65 18.900778 20.02428 0.9075661
##                   ACF1 Theil's U
## Training set 0.6284858        NA
## Test set     0.5838898  2.950173
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 61.356, df = 3, p-value = 3.016e-13
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(mass[[2]],
                         end = length(mass[[2]]) - 36)
turn_test <- subset(mass[[2]],
                        start = length(mass[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                       ME      RMSE        MAE         MPE      MAPE      MASE
## Training set 0.001152132 0.1067935 0.08783875  0.03294332  3.511463 0.3358543
## Test set     0.551483521 0.6479076 0.55148352 21.79941653 21.799417 2.1086152
##                   ACF1 Theil's U
## Training set 0.1462183        NA
## Test set     0.8900559  7.174677
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 16.953, df = 3, p-value = 0.0007226
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(mass[[3]],
                         end = length(mass[[3]]) - 36)
turn_test <- subset(mass[[3]],
                        start = length(mass[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                       ME     RMSE      MAE      MPE     MAPE      MASE
## Training set   -482.3436 288624.6 228234.3 -4.13075 24.77814 0.6158965
## Test set     331772.6120 406376.3 348832.3 25.73767 27.36274 0.9413336
##                   ACF1 Theil's U
## Training set 0.5770897        NA
## Test set     0.6632676  4.253538
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 39.597, df = 3, p-value = 1.297e-08
## 
## Model df: 16.   Total lags used: 19
######
turn_train <- subset(food[[1]],
                         end = length(food[[1]]) - 36)
turn_test <- subset(food[[1]],
                        start = length(food[[1]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                     ME      RMSE      MAE         MPE      MAPE      MASE
## Training set -15178.79  264823.3 228501.3 -0.07565517 0.9742296 0.1668443
## Test set     436262.30 1308573.9 816205.5  2.03829249 3.8758715 0.5959671
##                   ACF1 Theil's U
## Training set 0.2923271        NA
## Test set     0.8120089  1.117446
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 26.835, df = 3, p-value = 6.375e-06
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(food[[2]],
                         end = length(food[[2]]) - 36)
turn_test <- subset(food[[2]],
                        start = length(food[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                        ME       RMSE        MAE        MPE      MAPE      MASE
## Training set  0.006343403 0.01550418 0.01268765  0.2598285 0.5214184 0.2196819
## Test set     -0.103973783 0.12510943 0.10397378 -4.1432011 4.1432011 1.8002664
##                   ACF1 Theil's U
## Training set 0.3540895        NA
## Test set     0.7880882   4.59655
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 56.359, df = 3, p-value = 3.522e-12
## 
## Model df: 16.   Total lags used: 19
turn_train <- subset(food[[3]],
                         end = length(food[[3]]) - 36)
turn_test <- subset(food[[3]],
                        start = length(food[[3]]) - 35)
hw_mul_turn_train <- hw(turn_train,
                            h = 36,
                            seasonal = "multiplicative")
autoplot(hw_mul_turn_train)

accuracy(hw_mul_turn_train, turn_test)
##                      ME      RMSE       MAE         MPE      MAPE       MASE
## Training set  -32744.94  496560.6  411990.9 -0.06430237 0.7264877 0.09371163
## Test set     4898255.72 6565875.8 4970597.1  9.39394925 9.5339580 1.13061417
##                     ACF1 Theil's U
## Training set 0.007484374        NA
## Test set     0.892483018   2.49863
checkresiduals(hw_mul_turn_train)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 27.673, df = 3, p-value = 4.254e-06
## 
## Model df: 16.   Total lags used: 19

Automated forecasting using data models directly

autoplot(mass[[1]],main = "mass_vol")

autoplot(mass[[2]],main = "mass_pr")

autoplot(mass[[3]],main = "mass_turn")

autoplot(food[[1]],main = "food_vol")

autoplot(food[[2]],main = "food_pr")

autoplot(food[[3]],main = "food_turn")

autoplot(drug[[1]],main = "drug_vol")

autoplot(drug[[2]],main = "drug_pr")

autoplot(drug[[3]],main = "drug_turn")

hw_model <- function(datas) {
  hw_mul <- hw(datas,
                            h = 36,
                            seasonal = "multiplicative")
  return(hw_mul)
}
fc_month_mass_vol = hw_model(mass[[1]])
autoplot(fc_month_mass_vol,main="MASS month VOL Forecast at Holt-Winters multiplicative model")

fc_month_mass_pr = hw_model(mass[[2]])
autoplot(fc_month_mass_pr,main="MASS month PR Forecast at Holt-Winters multiplicative model")

fc_month_mass_turn = hw_model(mass[[3]])
autoplot(fc_month_mass_turn,main="MASS month TURNOVER Forecast at Holt-Winters multiplicative model")

fc_month_food_vol = hw_model(food[[1]])
autoplot(fc_month_food_vol,main="FOOD month VOL Forecast at Holt-Winters multiplicative model")

fc_month_food_pr = hw_model(food[[2]])
autoplot(fc_month_food_pr,main="FOOD month PR Forecast at Holt-Winters multiplicative model")

fc_month_food_turn = hw_model(food[[3]])
autoplot(fc_month_food_turn,main="FOOD month TURNOVER Forecast at Holt-Winters multiplicative model")

fc_month_drug_vol = hw_model(drug[[1]])
autoplot(fc_month_drug_vol,main="DRUG month VOL Forecast at Holt-Winters multiplicative model")

fc_month_drug_pr = hw_model(drug[[2]])
autoplot(fc_month_drug_pr,main="DRUG month PR Forecast at Holt-Winters multiplicative model")

fc_month_drug_turn = hw_model(drug[[3]])
autoplot(fc_month_drug_turn,main="DRUG month TURNOVER Forecast at Holt-Winters multiplicative model")